import re

import requests
from bs4 import BeautifulSoup

headers = {

    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/134.0.0.0 '
                  'Safari/537.36 Edg/134.0.0.0'
}

url = requests.get('https://www.biqu03.cc/read/50125', headers=headers)
html_content = url.content.decode()

# <dd><a href ="/read/50125/993.html">第一百六十八章 问答</a></dd>

result = re.findall('<dd><a href ="/read/50125/(.*?)">(.*?)</a></dd>', html_content)

with open('笔趣阁.txt', 'w', encoding='utf-8') as f:
    for i in result:

        result_url = requests.get(f"https://www.biqu03.cc/read/50125/{i[0]}", headers=headers)
        result_title = i[1]
        print(f"正在爬取{result_url, result_title}")

        result_content = result_url.content.decode()

        soup = BeautifulSoup(result_content, 'html.parser')
        novel_content_div = soup.find(id='chaptercontent')

        novel_content = novel_content_div.get_text(strip=True)
        if novel_content_div:
            # 获取文本内容，保留换行符
            print("正在写入......")
            novel_content = novel_content_div.get_text(separator='\n')
            f.write(f"{result_title}\n{novel_content}\n\n")

        else:
            print("未找到小说内容区域。")



